package com.shu.utils;

import com.shu.pojo.Article;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * @author shu
 * @date 2021/3/23
 */

/**
 * 爬虫工具类
 */
public class CrawlerUtils {

    /**
     * 公共方法
     * @param url
     * @return
     * @throws IOException
     */
    public static List<Article> Crawler_Ways(String url,Integer tid) throws IOException {
        List<Article> list=new ArrayList<>();
        Document document = Jsoup.parse(new URL(url), 3000);
        Elements elementsByClass = document.getElementsByClass("sec-title");
        for (Element aClass : elementsByClass) {
            Article article1=new Article();
            String href = aClass.attr("href");
            String s = href.substring(5);
            //http://www.china-tjftz.gov.cn/html/cntjzymyqn/YSHJ24999/2021-03-11/Detail_584862.html
            String u= "http://www.china-tjftz.gov.cn/html/cntjzymyqn";
            String rel_url=u+s;
            Document documents = Jsoup.parse(new URL(rel_url), 5000);
            String zoomtitl = documents.getElementById("zoomtitl").text();
            String text = documents.getElementById("zoomcon").text();
            article1.setTitle(zoomtitl);
            article1.setContent(text);
            article1.setTid(tid);
            article1.setTime(new Date());
            list.add(article1);
        }
        return list;

    }

    /**
     * 营商爬虫
     * @return
     */
    public static List<Article> Business_Crawler() throws IOException {
        String url="http://www.china-tjftz.gov.cn/html/cntjzymyqn/YSHJ24999/List/list_0.htm";
        List<Article> list = Crawler_Ways(url,2);
        return list;
    }

    /**
     * 政务爬虫
     * @return
     * @throws IOException
     */
    public static List<Article> Government_Crawler() throws IOException {
        String url="http://www.china-tjftz.gov.cn/html/cntjzymyqn/TJSJ25012/List/list_0.htm";
        List<Article> list = Crawler_Ways(url,1);
        return list;
    }

    /**
     * 法规爬虫
     * @return
     * @throws IOException
     */
    public static List<Article> Laws_Crawler() throws IOException {
        String url="http://www.china-tjftz.gov.cn/html/cntjzymyqn/ZCFG24994/List/list_0.htm";
        List<Article> list = Crawler_Ways(url, 3);
        return list;
    }

    /**
     * 政策法规爬虫
     * @return
     * @throws IOException
     */
    public static List<Article> Industry_Crawler() throws IOException {
        String url="http://www.china-tjftz.gov.cn/html/cntjzymyqn/ZCFG24994/List/list_0.htm";
        List<Article> list = Crawler_Ways(url, 4);
        return list;
    }

    /**
     * 要闻爬虫
     * @return
     * @throws IOException
     */
    public static List<Article> News_Crawler() throws IOException {
        String url="http://www.china-tjftz.gov.cn/html/cntjzymyqn/YWZX24993/List/list_0.htm";
        List<Article> list = Crawler_Ways(url, 5);
        return list;
    }




}
